#A lexander F. Gazmararian
# afg2@princeton.edu

# Load packages
library(tidyverse)
library(margins)
library(modelsummary)
library(emmeans)
library(sandwich)
library(estimatr)
# Packages for structural topic model
library(stm)
library(geometry)
library(Rtsne)
library(rsvd)

# Load functions
source("code/fun/book_theme.r")
source("code/fun/savefig.r")
source("code/fun/fix_txt.r")

# Set up coefficient names
source("code/fun/coefnames4tables.r")
coefnames <-
  c(
    coefnames,
    "lock_second" = "Lock-in order",
    "delegate_lead_treat"="Community delegation",
    "delegate_voice_treat"="Public input",
    "delegate_fund_treat"="Long-term funding",
    "delegate_lead_treat:delegate_voice_treat" = "Community delegation x Public input",
    "delegate_lead_treat:delegate_fund_treat" = "Community delegation x Long-term funding",
    "delegate_voice_treat:delegate_fund_treat" = "Public input x Long-term funding",
    "delegate_lead_treat:delegate_voice_treat:delegate_fund_treat" = "Community delegation x Public input x Long-term funding",
    "lockin_treat_poolGeneral Benefits" = "General benefits treatment",
    "lockin_treat_poolSpecific Benefits" = "Specific benefits treatment",
    "consensus_treat" = "Consensus treatment",
    "consensus_treat:PartySummaryNeither" = "Consensus treatment x Independent",
    "consensus_treat:PartySummaryRepublican" = "Consensus treatment x Republican"
  )

# Load data
g <- readRDS("data/NatQual_Winter22.rds")

# Specify baseline model
f.base <- y ~ age + Female + Black + Hispanic +  PartySummary + CollegeDegree + employfull + Rural_bin + Investment + ClimateImpact_tri + income5 + trust_index

# Estimate models
estimate_lockin <- function(formula) {
  m <- list()
  m[["Lockin_index"]] <- lm(update(formula, Lockin_index ~ .), g)
  m[["CreditSupport_bin"]] <- lm(update(formula, CreditSupport_bin ~ .), g)
  m[["CreditSupport_scale"]] <- lm(update(formula, CreditSupport_scale ~ .), g)
  m[["LockVote_bin"]] <- lm(update(formula, LockVote_bin ~ .), g)
  m[["LockVote_scale"]] <- lm(update(formula, LockVote_scale ~ .), g)
  m[["Purchase_bin"]] <- lm(update(formula, Purchase_bin ~ .), g)
  m[["Purchase_scale"]] <- lm(update(formula, Purchase_scale ~ .), g)
  m[["Climate_bin"]] <- lm(update(formula, Climate_bin ~ .), g)
  m[["Climate_scale"]] <- lm(update(formula, Climate_scale ~ .), g)
  m[["LockWorry_bin"]] <- lm(update(formula, LockWorry_bin ~ .), g)
  m[["LockWorry_scale"]] <- lm(update(formula, LockWorry_scale ~ .), g)
  return(m)
}

# Benefit versus no benefits comparison 
f.lock <- update(f.base, ~ lockin_treat_pool  + . + CollegeDegree * PartySummary + lock_second)
lockin <- estimate_lockin(f.lock)

## Create online appendix table ----
lock.ols <- list(lockin$Lockin_index, lockin$CreditSupport_scale, lockin$LockVote_scale, lockin$Purchase_scale, lockin$Climate_scale, lockin$LockWorry_scale)
names(lock.ols) <- c("Index", "Credit", "Vote", "Purchase", "Climate", "Worry")
file <- "tables/ch5/ols_lockin.txt"
modelsummary(
  lock.ols,
  vcov = "HC2",
  stars = c("*"=.1,"**"=.05,"***"=.01),
  coef_map = coefnames,
  gof_map = c("nobs", "adj.r.squared"),
  escape=FALSE,
  output="latex"
) %>%
  cat(., file = file)
fix_txt(file)

## Create Figure 5.4----

# Estimate marginal effects
lockin.df <- lapply(lockin, function(x) {margins::margins_summary(x, variables = "lockin_treat_pool", vcov = sandwich::vcovHC(x, "HC2")) %>% data.frame()})
lockin.df <- dplyr::bind_rows(lockin.df, .id = "outcome")
lockin.df$lower90 <- with(lockin.df, AME + SE * qnorm(.05))
lockin.df$upper90 <- with(lockin.df, AME + SE * qnorm(.95))
lockin.df %>%
  ggplot(aes(x = outcome, y = AME, color = factor)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "grey") +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, position = position_dodge(.4)) +
  geom_errorbar(aes(ymin = lower90, ymax = upper90), width = 0, size = 1.75, position = position_dodge(.4)) +
  geom_point(size = 4, position = position_dodge(.4)) +  coord_flip() +
  coord_flip() +
  book_theme +
  theme(legend.position = "bottom")

# Create Figure 5.4
lockin.plot <-
  lockin.df %>%
  dplyr::filter(outcome %in% c("Purchase_scale", "LockWorry_scale", "Lockin_index", "CreditSupport_scale", "Climate_scale", "LockVote_scale")) %>%
  dplyr::mutate(
    outcome = dplyr::case_when(
      outcome == "Purchase_scale" ~ "Purchase EV",
      outcome == "LockWorry_scale" ~ "Not concerned about reversal",
      outcome == "Lockin_index" ~ "Green economy support index",
      outcome == "CreditSupport_scale" ~ "Support EV tax credits",
      outcome == "Climate_scale" ~ "Support climate policy",
      outcome == "LockVote_scale" ~ "Vote for pro-EV politician"
    ),
    outcome = 
      factor(
        outcome,
        ordered = TRUE,
        levels = c(
          "Not concerned about reversal",
          "Vote for pro-EV politician",
          "Support climate policy",
          "Support EV tax credits",
          "Purchase EV",
          "Green economy support index"
        )
      ),
    factor = ifelse(factor == "lockin_treat_poolGeneral Benefits", "General Benefits", "Specific Benefits")
  ) %>%
  ggplot(aes(x = outcome, y = AME, color = factor, shape = factor)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "grey") +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, position = position_dodge(.4)) +
  geom_errorbar(aes(ymin = lower90, ymax = upper90), width = 0, size = 1.75, position = position_dodge(.4)) +
  geom_point(size = 4, position = position_dodge(.4)) +  coord_flip() +
  coord_flip() +
  book_theme +
  theme(legend.position = "bottom") +
  scale_color_grey() +
  labs(color="Treatment:",shape="Treatment:", x= "", y = "Estimate", caption = "Notes: Thin and thick lines denote 95 and 90 percent confidence intervals")
lockin.plot
savefig(lockin.plot, "5.4_figure_lockin", height = 2.8, filepath = "figures/")

## Create online appendix plot for heterogeneous effects by partisan identification ----
# Estimate model
f.lock.pid <- update(f.base, ~ lockin_treat_pool * PartySummary + lock_second + . + CollegeDegree * PartySummary)
lockin.pid <- estimate_lockin(f.lock.pid)
summary(lockin.pid$Climate_scale)

# Retrieve average marginal effects
lockin.pid.df <- lapply(lockin.pid, function(x) {margins::margins_summary(x, variables = "lockin_treat_pool", at = list(PartySummary = c("Democrat", "Republican", "Neither")),
                                                                          vcov = sandwich::vcovHC(x, "HC2")) %>% data.frame()})

# Prepare dataframe for plot
lockin.pid.df <- dplyr::bind_rows(lockin.pid.df, .id = "outcome")
subset(lockin.pid.df, PartySummary=="Republican")

# Construct confidence intervals
lockin.pid.df$lower90 <- with(lockin.pid.df, AME + SE * qnorm(.05))
lockin.pid.df$upper90 <- with(lockin.pid.df, AME + SE * qnorm(.95))

# Create plot
lockin.pid.plot <-
  lockin.pid.df %>%
  dplyr::filter(outcome %in% c("Purchase_scale", "LockWorry_scale", "Lockin_index", "CreditSupport_scale", "Climate_scale", "LockVote_scale")) %>%
  dplyr::mutate(
    outcome = dplyr::case_when(
      outcome == "Purchase_scale" ~ "Purchase EV",
      outcome == "LockWorry_scale" ~ "Worry about reversal",
      outcome == "Lockin_index" ~ "Green economy support index",
      outcome == "CreditSupport_scale" ~ "Support EV tax credits",
      outcome == "Climate_scale" ~ "Support climate policy",
      outcome == "LockVote_scale" ~ "Vote for pro-EV politician"
    ),
    outcome = 
      factor(
        outcome,
        ordered = TRUE,
        levels = c(
          "Worry about reversal",
          "Vote for pro-EV politician",
          "Support climate policy",
          "Support EV tax credits",
          "Purchase EV",
          "Green economy support index"
        )
      ),
    factor = ifelse(factor == "lockin_treat_poolGeneral Benefits", "General Benefits", "Specific Benefits")
  ) %>%
  dplyr::filter(factor == "General Benefits" & PartySummary != "Neither") %>%
  ggplot(aes(x = outcome, y = AME, color = PartySummary, shape = PartySummary)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "grey") +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, position = position_dodge(.4)) +
  geom_errorbar(aes(ymin = lower90, ymax = upper90), width = 0, size = 1.75, position = position_dodge(.4)) +
  geom_point(size = 4, position = position_dodge(.4)) +  coord_flip() +
  coord_flip() +
  book_theme +
  theme(legend.position = "bottom") +
  scale_color_grey() +
  labs(color="",shape="", x= "", y = "Estimate")

# Save figure output
lockin.pid.plot
savefig(lockin.pid.plot, "si_lockin_pid", height = 2.8, filepath = "figures/ch5/")

## Structural topic model of open-ended ----

# Process open-ended
ProcLockTop <- textProcessor(g$LockOE, metadata = g)
LockTop <- prepDocuments(ProcLockTop$documents, ProcLockTop$vocab, ProcLockTop$meta)

# Estimate STM
LockTop.stm <- stm(
  documents = LockTop$documents,
  vocab = LockTop$vocab,
  K = 20,
  seed = 20230115,
  prevalence = ~ lockin_treat_pool + lock_second,
  data = LockTop$meta,
  init.type = "Spectral"
)

#label topics
png(filename = "figures/ch5/si_stm_lockoe.png", width = 6.5, height = 4.5, res = 300, units = "in", pointsize = 10)
plot(LockTop.stm, type = "summary")
dev.off()

# Examine documents that are highly associated with topics
findThoughts(LockTop.stm, texts = LockTop$meta$LockOE, n = 3, topic = 8) #mentioned new jobs

# Estimate model
LockTop.effect <- estimateEffect(formula = c(8) ~ lockin_treat_pool, LockTop.stm, meta = LockTop$meta, uncertainty = "Global")

# Create table ouptut
m.oe <- summary(LockTop.effect, topic = 8) # Much more likely to mention jobs in the treatment conditions
m.oe.df <- data.frame(m.oe$tables[[1]])
m.oe.df <- rownames_to_column(m.oe.df, var = "term")
names(m.oe.df) <- c("term", "estimate", "std.error", "t", "p.value")
m.oe.df
gl <- data.frame(
  N = nrow(LockTop$meta)[[1]]
)
mod <- list(tidy = m.oe.df, glance = gl)
class(mod) <- "modelsummary_list"
file <- "tables/ch5/tbl_stm_top.txt"
modelsummary(
  mod,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  coef_map = c(
    "(Intercept)"="Intercept",
    "lockin_treat_poolGeneral Benefits"="General Benefits",
    "lockin_treat_poolSpecific Benefits"="Specific Benefits"
  ),
  output = "latex"
) %>%
  cat(., file = file)
fix_txt(file)
